source(here::here("text_code/text_packages_rep.R"))


#loading in anes data
a16_og <- read.delim( #all non-open-ended data
    file = here("text_data_raw", "ANES", "anes_timeseries_2016_rawdata.txt"), 
    sep = "|") 

a16_open1 <- read_excel( #like dem
    here("text_data_raw", "ANES", "anes_timeseries_2016_redacted_openends.xlsx"), 
    sheet = "V161069") %>% 
    clean_names() 

a16_open2 <- read_excel( #dislike dem
    here("text_data_raw", "ANES", "anes_timeseries_2016_redacted_openends.xlsx"), 
    sheet = "V161072") %>% 
    clean_names() 

a16_open3 <- read_excel( #like rep
    here("text_data_raw", "ANES", "anes_timeseries_2016_redacted_openends.xlsx"), 
    sheet = "V161075") %>% 
    clean_names() 

a16_open4 <- read_excel( #dislike rep
    here("text_data_raw", "ANES", "anes_timeseries_2016_redacted_openends.xlsx"), 
    sheet = "V161078") %>% 
    clean_names() 

# deprecated due to change in ANES data release, Problems_Collected removed
# a16_problems <- read_excel( #major problems in US
#     here("text_data_raw", "ANES", "anes_timeseries_2016_redacted_openends.xlsx"),
#     sheet = "Problems_Collected") %>% 
#     clean_names() 

path_xlsx <- here("text_data_raw", "ANES", "anes_timeseries_2016_redacted_openends.xlsx")

# the four sheets that replace "Problems_Collected"
target_sheets <- c("V162116a","V162118a","V162120a","V162122a")

# read each sheet, clean names, and standardize the ID column name/type
probs_list <- purrr::map(
    target_sheets,
    ~ read_excel(path_xlsx, sheet = .x) %>%
        clean_names() %>%
        # make sure the id column is consistently named and typed
        rename(hover_here_for_note_v160001 = matches("(?i)v160001|hover_here_for_note_v160001")) %>%
        mutate(hover_here_for_note_v160001 = as.integer(hover_here_for_note_v160001))
)

# merge the four sheets side-by-side by respondent ID
a16_problems <- purrr::reduce(probs_list, full_join, by = "hover_here_for_note_v160001")


a16_vote <- read.csv( #validated voter turnout
    file = here("text_data_raw", "ANES", "anes_timeseries_2016_voteval.csv"))

#merging candidate opinions data
q1 <- merge(a16_open1, a16_open2, by = "hover_here_for_note_v160001") #merging open-ended question datasets
q2 <- merge(q1, a16_open3, by = "hover_here_for_note_v160001")
q3 <- merge(q2, a16_open4, by = "hover_here_for_note_v160001")

q3 <- q3 %>% mutate(V160001_orig = as.integer(hover_here_for_note_v160001)) #recoding ID column to match a16

a16 <- left_join(a16_og, q3, by = "V160001_orig")

#merging problems data
a16_problems <- a16_problems %>% mutate( 
    V160001_orig = as.integer(hover_here_for_note_v160001)
    ) 

a16 <- left_join(a16, a16_problems, by = "V160001_orig") %>% 
    select(everything(), -hover_here_for_note_v160001.x, -hover_here_for_note_v160001.y)

#merging vote validation data
a16_vote <- a16_vote %>% rename(
    V160001_orig = V160001) %>% 
    select(V160001_orig, vote2016, vote2016_prob, prob_match)

a16 <- left_join(a16, a16_vote, by = "V160001_orig") %>%
    mutate(
        vote_validated16 = ifelse(vote2016_prob > .95, 1, 0) #picked arbitrary cutoff --> 95% certain they voted
    )

#cleaning variable names
a16 <- a16 %>% 
rename(
  like_dem      = v161069_pre_what_is_it_that_r_likes_about_democratic_pres_cand,
  dislike_dem   = v161072_pre_what_is_it_that_r_dislikes_about_democratic_pres_cand, 
  like_rep      = v161075_pre_what_is_it_that_r_likes_about_republican_pres_cand,
  dislike_rep   = v161078_pre_what_is_it_that_r_dislikes_about_republican_pres_cand,

  problem1      = v162116a_post_mention_1_most_important_problems_facing_the_country,
  problem2      = v162118a_post_mention_2_most_important_problems_facing_the_country,
  problem3      = v162120a_post_mention_3_most_important_problems_facing_the_country,
  problem4      = v162122a_post_which_among_mentions_is_the_most_important_problem
)

#coding variable for sexism16 from gender resentment
a16 <- a16 %>% 
    mutate(
    gendres1 = case_when(
        V161507 == 1 ~ 5, 
        V161507 == 2 ~ 4,
        V161507 == 3 ~ 3,
        V161507 == 4 ~ 2,
        V161507 == 5 ~ 1,
        TRUE ~ as.numeric(NA)
    ),
    gendres2 = case_when(
        V161508 == 1 ~ 5, 
        V161508 == 2 ~ 4,
        V161508 == 3 ~ 3,
        V161508 == 4 ~ 2,
        V161508 == 5 ~ 1,
        TRUE ~ as.numeric(NA)
    ),
    gendres3 = case_when(
        V161509 == 1 ~ 5, 
        V161509 == 2 ~ 4,
        V161509 == 3 ~ 3,
        V161509 == 4 ~ 2,
        V161509 == 5 ~ 1,
        TRUE ~ as.numeric(NA)
    ),
    gendres4 = case_when(
        V161510 == 1 ~ 5, 
        V161510 == 2 ~ 4,
        V161510 == 3 ~ 3,
        V161510 == 4 ~ 2,
        V161510 == 5 ~ 1,
        TRUE ~ as.numeric(NA)
    )
    )

a16 <- a16 %>% 
    mutate(
        sexism16      = rowMeans(select(., gendres1, gendres2, gendres3, gendres4), na.rm = TRUE)
)

#coding racial resentment battery (from post-election survey)
a16 <- a16 %>% 
    mutate(
    rr_favors16 = case_when(
        V162211 == 1 ~ 5, 
        V162211 == 2 ~ 4,
        V162211 == 3 ~ 3,
        V162211 == 4 ~ 2,
        V162211 == 5 ~ 1,
        TRUE ~ as.numeric(NA)
    ),
    rr_slavery16 = case_when(
        V162212 == 1 ~ 1, 
        V162212 == 2 ~ 2,
        V162212 == 3 ~ 3,
        V162212 == 4 ~ 4,
        V162212 == 5 ~ 5,
        TRUE ~ as.numeric(NA)
    ),
    rr_deserve16 = case_when(
        V162213 == 1 ~ 1, 
        V162213 == 2 ~ 2,
        V162213 == 3 ~ 3,
        V162213 == 4 ~ 4,
        V162213 == 5 ~ 5,
        TRUE ~ as.numeric(NA)
    ),
    rr_harder16 = case_when(
        V162214 == 1 ~ 5, 
        V162214 == 2 ~ 4,
        V162214 == 3 ~ 3,
        V162214 == 4 ~ 2,
        V162214 == 5 ~ 1,
        TRUE ~ as.numeric(NA)
    )
    )

a16 <- a16 %>% 
    mutate(
      racial_resent16      = rowMeans(select(., rr_favors16, rr_slavery16, rr_deserve16, rr_harder16), na.rm = TRUE),
      # low_racial_resent  = ifelse(racial_resent16 <= 2.25, 1, 0),
      # high_racial_resent = ifelse(racial_resent16 >= 3.75, 1, 0)
)

#coding white identity battery (from post-election survey)
a16 <- a16 %>% 
    mutate(
    white_id1 = case_when(
        V162316 == 1 ~ 5, 
        V162316 == 2 ~ 4,
        V162316 == 3 ~ 3,
        V162316 == 4 ~ 2,
        V162316 == 5 ~ 1,
        TRUE ~ as.numeric(NA)
    ),
    white_id2 = case_when(
        V162317 == 1 ~ 5, 
        V162317 == 2 ~ 4,
        V162317 == 3 ~ 3,
        V162317 == 4 ~ 2,
        V162317 == 5 ~ 1,
        TRUE ~ as.numeric(NA)
    ),
    white_id3 = case_when(
        V162327 == 1 ~ 5, 
        V162327 == 2 ~ 4,
        V162327 == 3 ~ 3,
        V162327 == 4 ~ 2,
        V162327 == 5 ~ 1,
        TRUE ~ as.numeric(NA)
    ),
    white_id4 = case_when(
        V162360 == 1 ~ 5, 
        V162360 == 2 ~ 4,
        V162360 == 3 ~ 3,
        V162360 == 4 ~ 2,
        V162360 == 5 ~ 1,
        TRUE ~ as.numeric(NA)
    )
    )

a16 <- a16 %>% 
    mutate(
    white_id = rowMeans(select(., white_id1, white_id2, white_id3, white_id4), na.rm = TRUE)
)

#coding authoritarian battery (from post-election survey)
a16 <- a16 %>% 
    mutate(
    authorit1 = case_when(
        V162239 == 1 ~ 1, 
        V162239 == 2 ~ 5,
        V162239 == 3 ~ 3,
        TRUE ~ as.numeric(NA)
    ),
    authorit2 = case_when(
        V162240 == 1 ~ 1, 
        V162240 == 2 ~ 5,
        V162240 == 3 ~ 3,
        TRUE ~ as.numeric(NA)
    ),
    authorit3 = case_when(
        V162241 == 1 ~ 5, 
        V162241 == 2 ~ 1,
        V162241 == 3 ~ 3,
        TRUE ~ as.numeric(NA)
    ),
    authorit4 = case_when(
        V162242 == 1 ~ 1, 
        V162242 == 2 ~ 5,
        V162242 == 3 ~ 3,
        TRUE ~ as.numeric(NA)
    )
    )

a16 <- a16 %>% 
    mutate(
    authorit16 = rowMeans(select(., authorit1, authorit2, authorit3, authorit4), na.rm = TRUE),
    # low_authorit  = ifelse(authorit16 <= 2, 1, 0),
    # high_authorit = ifelse(authorit16 >= 4, 1, 0)
    
)


# coding political attention (from pre-election survey)
# # PRE: How often does R pay attn to politics and elections



#coding civic engagement battery (from post-election survey)
a16 <- a16 %>% 
    mutate(
    civic_rally = case_when(
        V162018a == 1 ~ 1, 
        V162018a == 2 ~ 0,
        TRUE ~ as.numeric(NA)
    ),
    civic_petition = case_when(
        V162018b == 1 ~ 1, 
        V162018b == 2 ~ 0,
        TRUE ~ as.numeric(NA)
    ),
    civic_relig = case_when(
        V162018c == 1 ~ 1, 
        V162018c == 2 ~ 0,
        TRUE ~ as.numeric(NA)
    ),
    civic_org = case_when(
        V162018d == 1 ~ 1, 
        V162018d == 2 ~ 0,
        TRUE ~ as.numeric(NA)
    )
)

# civic engagement
a16 <- a16 %>% 
    mutate(
      civic = rowMeans(select(., civic_rally, civic_petition, civic_relig, civic_org), na.rm = TRUE)
    )



# other engagement
a16 <- a16 %>%
    mutate(
        engage_talk = case_when(
            V162010 == 1 ~ 1,
            V162010 == 2 ~ 0,
            V162010 < 0 ~ NA_real_
        ),
        engage_rally = case_when(
            V162011 == 1 ~ 1,
            V162011 == 2 ~ 0,
            V162011 < 0 ~ NA_real_
        ),
        engage_display = case_when(
            V162012 == 1 ~ 1,
            V162012 == 2 ~ 0,
            V162012 < 0 ~ NA_real_
        ),
        engage_work = case_when(
            V162013 == 1 ~ 1,
            V162013 == 2 ~ 0,
            V162013 < 0 ~ NA_real_
        ),
        engage_donate_cand = case_when(
            V162014 == 1 ~ 1,
            V162014 == 2 ~ 0,
            V162014 < 0 ~ NA_real_
        ),
        engage_donate_org = case_when(
            V162016 == 1 ~ 1,
            V162016 == 2 ~ 0,
            V162016 < 0 ~ NA_real_
        ),
        engage_volunteer = case_when(
            V162017 == 1 ~ 1,
            V162017 == 2 ~ 0,
            V162017 < 0 ~ NA_real_
        )
    ) %>%
    mutate(
        engage_scale = 
            rowMeans(
                select(., engage_talk, engage_rally, engage_display, engage_work,
                          engage_donate_cand, engage_donate_org, engage_volunteer
        ), na.rm = TRUE)
    )




a16 <- a16 %>%
    mutate(
        # 1. Political discussion (binary)
        discuss_any = case_when(
            V162174 == 1 ~ 1,
            V162174 == 2 ~ 0,
            V162174 < 0 ~ NA_real_
        ),
        
        # 2. Days discussed politics (scaled)
        discuss_pol_days = case_when(
            V162174a >= 0 & V162174a <= 7 ~ V162174a,
            V162174a < 0 ~ NA_real_
        ),
        discuss_days_scaled = case_when(
            V162174a >= 0 & V162174a <= 7 ~ V162174a / 7,
            V162174a < 0 ~ NA_real_
        ),
        
        # 3. Compromise attitude (binary: 1 = prefers compromise)
        prefers_compromise = case_when(
            V161171 == 1 ~ 1,
            V161171 == 2 ~ 0,
            V161171 < 0 ~ NA_real_
        )
    ) %>%
    # Combine the three into a deliberation scale
    mutate(
        deliberation_scale = rowMeans(select(., discuss_days_scaled), na.rm = TRUE) # dropping discuss_any, prefers_compromise
    )


# political knowledge
a16 <- a16 %>%
    mutate(
        pk_senator_term   = case_when(V161513  == 6 ~ 1, V161513 > 0 ~ 0, TRUE ~ NA_real_),
        pk_least_spending = case_when(V161514  == 1 ~ 1, V161514 > 0 ~ 0, TRUE ~ NA_real_),
        pk_house_party    = case_when(V161515  == 2 ~ 1, V161515 > 0 ~ 0, TRUE ~ NA_real_),
        pk_senate_party   = case_when(V161516  == 2 ~ 1, V161516 > 0 ~ 0, TRUE ~ NA_real_),
        pk_biden          = case_when(V162072  == 1 ~ 1, V162072 == 0 ~ 0, V162072 < 0 ~ NA_real_),
        pk_merkel         = case_when(V162074a == 1 ~ 1, V162074a == 0 ~ 0, V162074a < 0 ~ NA_real_),
        pk_putin          = case_when(V162075a == 1 ~ 1, V162075a == 0 ~ 0, V162075a < 0 ~ NA_real_),
        # partial credit with 0.5 coding
        pk_roberts        = case_when(V162076a >= 0 ~ V162076a, V162076a < 0 ~ NA_real_),
        pk_ryan           = case_when(V162073b >= 0 ~ V162073b, V162073b < 0 ~ NA_real_),
    ) %>%
    
    mutate(
        pk_scale = 
            rowMeans(
                select(.,pk_senator_term, pk_least_spending, pk_house_party, pk_senate_party,
                         pk_biden, pk_ryan, pk_merkel, pk_putin, pk_roberts
        ), na.rm = TRUE)
    )



# opinionated
a16 <- a16 %>%
    mutate(
        # It bothers me to remain neutral
        nfc_bother_neutral = case_when(
            V162251 %in% 1:5 ~ (V162251 - 1) / 4,  # Rescale to 0–1
            TRUE ~ NA_real_
        ),
        #I have many more opinions than the average person
        nfc_more_opinions = case_when(
            V162252 %in% 1:5 ~ (V162252 - 1) / 4,  # Rescale to 0–1
            TRUE ~ NA_real_
        )
    ) %>% 
    mutate(
        nfc_proxy = rowMeans(select(., nfc_bother_neutral, nfc_more_opinions), na.rm = TRUE)
    )

# 'politically correct'
a16 <- a16 %>%
    mutate(
        pol_correct_fct = case_when(
            V161362 == 1 ~ "High Concern for Offense",
            V161362 == 2 ~ "Moderate Concern for Offense",
            V161362 == 3 ~ "Moderate Dismissal of Offense",
            V161362 == 4 ~ "Strong Dismissal of Offense",
            TRUE ~ NA_character_
        ) %>% forcats::fct_relevel("Strong Dismissal of Offense", "Moderate Dismissal of Offense", "Moderate Concern for Offense", "High Concern for Offense"),
        pol_correct_fct4 = case_when(
            V161362 == 1 ~ "Norm Concern: High",
            V161362 == 2 ~ "Norm Concern: Mod-High",
            V161362 == 3 ~ "Norm Concern: Mod-Low",
            V161362 == 4 ~ "Norm Concern: Low",
            TRUE ~ NA_character_
        ) %>% forcats::fct_relevel("Norm Concern: Low", "Norm Concern: Mod-Low", "Norm Concern: Mod-High", "Norm Concern: High"),
        pol_correct_fct3_16 = case_when(
            V161362 == 1 ~ "Norm Concern: High",
            V161362 == 2 ~ "Norm Concern: Moderate",
            V161362 == 3 ~ "Norm Concern: Moderate",
            V161362 == 4 ~ "Norm Concern: Low",
            TRUE ~ NA_character_
        ) %>% forcats::fct_relevel("Norm Concern: Low", "Norm Concern: Moderate", "Norm Concern: High"),
        pol_correct_fct2 = case_when(
            V161362 == 1 | V161362 == 2 ~ "Norm Concern: High",
            V161362 == 3 | V161362 == 4 ~ "Norm Concern: Low",
            TRUE ~ NA_character_
        ) %>% forcats::fct_relevel("Norm Concern: Low", "Norm Concern: High"),
        pol_correct_num = case_when(
            V161362 >= 1 ~ V161362,
            TRUE ~ NA_real_
        ),
        pol_correct_num_fct = case_when(
            V161362 == 1 ~ "4 - High",
            V161362 == 2 ~ "3",
            V161362 == 3 ~ "2",
            V161362 == 4 ~ "1 - Low",
            TRUE ~ NA_character_
        ) %>% forcats::fct_relevel("1 - Low", "2", "3", "4 - High")
        
    )


#coding political interest battery (from post-election survey)
a16 <- a16 %>% 
    mutate(
        pol_attn16      = (5-(V161003)) / 4,
        camp_int      = (3-(V161004)) / 2,
        
        pol_vote12    = case_when(
            V161005 == 1 ~ 1,
            TRUE         ~ 0 # collapses No, didn't vote, Don't know and Refused
        ),
        news_days     = case_when(
            V161008 >= 0 ~ V161008,
            TRUE ~ NA_real_
        ) / 7,
        news_pol      = case_when(
            V161009 > 0 ~ 5-(V161009),
            TRUE ~ NA_real_
        ) / 4
    )

a16 <- a16 %>% 
    mutate(
        pol_int = rowMeans(select(., pol_attn16, camp_int, news_days, news_pol), na.rm = TRUE)
    )


# efficacy
a16 <- a16 %>% 
    mutate(
        effic_care    = ifelse(V162215 > 0, (6 - (V162215)) / 5, NA_real_),
        effic_say     = ifelse(V162216 > 0, (6 - (V162216)) / 5, NA_real_),
        effic_comp    = ifelse(V162217 > 0, (6 - (V162217)) / 5, NA_real_)
    )
    
a16 <- a16 %>% 
    mutate(
        effic_scale = rowMeans(select(., effic_care, effic_say, effic_comp), na.rm = TRUE)
    )



# Step 1: Create individual recoded TIPI items
a16 <- a16 %>%
    mutate(
        # Extraversion
        tipi_extra_pos = ifelse(V162334 > 0, (V162334 - 1) / 6, NA_real_),
        tipi_extra_neg = ifelse(V162339 > 0, 1 - (V162339 - 1) / 6, NA_real_),
        
        # Agreeableness
        tipi_agree_pos = ifelse(V162336 > 0, (V162336 - 1) / 6, NA_real_),
        tipi_agree_neg = ifelse(V162341 > 0, 1 - (V162341 - 1) / 6, NA_real_),
        
        # Conscientiousness
        tipi_consc_pos = ifelse(V162335 > 0, (V162335 - 1) / 6, NA_real_),
        tipi_consc_neg = ifelse(V162340 > 0, 1 - (V162340 - 1) / 6, NA_real_),
        
        # Emotional Stability
        tipi_stable_pos = ifelse(V162337 > 0, (V162337 - 1) / 6, NA_real_),
        tipi_stable_neg = ifelse(V162342 > 0, 1 - (V162342 - 1) / 6, NA_real_),
        
        # Openness
        tipi_open_pos = ifelse(V162338 > 0, (V162338 - 1) / 6, NA_real_),
        tipi_open_neg = ifelse(V162343 > 0, 1 - (V162343 - 1) / 6, NA_real_)
    )

# Step 2: Compute averages for each Big Five trait
a16 <- a16 %>%
    mutate(
        tipi_extra     = rowMeans(select(., tipi_extra_pos, tipi_extra_neg), na.rm = TRUE),
        tipi_agree     = rowMeans(select(., tipi_agree_pos, tipi_agree_neg), na.rm = TRUE),
        tipi_consc     = rowMeans(select(., tipi_consc_pos, tipi_consc_neg), na.rm = TRUE),
        tipi_stable    = rowMeans(select(., tipi_stable_pos, tipi_stable_neg), na.rm = TRUE),
        tipi_open      = rowMeans(select(., tipi_open_pos, tipi_open_neg), na.rm = TRUE)
    )


a16 <- a16 %>%
    mutate(
        tipi_compliance = rowMeans(select(., tipi_agree, tipi_consc), na.rm = TRUE)
    )


#coding additional explanatory variables
a16 <- a16 %>% 
    mutate(
    gender = case_when(
        V161342 == 1 ~ "male",
        V161342 == 2 ~ "female16",
        V161342 == 3 ~ "other",
        TRUE ~ NA_character_),
    gender_bin = case_when(
        V161342 == 1 ~ "male",
        V161342 == 2 ~ "female16",
        #V161342 == 3 ~ "other",
        TRUE ~ NA_character_),
    female16 = case_when(
      V161342 == 1 ~ "no",
      V161342 == 2 ~ "yes",
      V161342 == 3 ~ "no", # recode other to be "not female16"
      TRUE ~ NA_character_),
    male = case_when(
      V161342 == 1 ~ "yes",
      V161342 == 2 ~ "no",
      V161342 == 3 ~ "no", # recode other to be "not male"
      TRUE ~ NA_character_),
    race16 = case_when(
        V161310x == 1 ~ "white",
        V161310x == 2 ~ "black",
        V161310x == 3 ~ "asian",
        V161310x == 4 ~ "native_american",
        V161310x == 5 ~ "hispanic",
        V161310x == 6 ~ "other",
        TRUE ~ NA_character_),
    race4_16 = case_when(
      V161310x == 1 ~ "white",
      V161310x == 2 ~ "black",
      V161310x == 3 ~ "other",
      V161310x == 4 ~ "other",
      V161310x == 5 ~ "hispanic",
      V161310x == 6 ~ "other",
      TRUE ~ NA_character_),
    ideo7_16 = case_when(
        V161126 <= 0 ~ NA_integer_,
        V161126 <= 8 ~ V161126,
        V161126 == 99 & V161127 == 1 ~ 2,
        V161126 == 4  & V161127 == 1 ~ 3,
        
        V161126 == 99 & V161127 == 2 ~ 6,
        V161126 == 4  & V161127 == 2 ~ 5,
        
        V161126 == 99 & V161127 == 3 ~ 4,
        V161126 == 4  & V161127 == 3 ~ 4,
        TRUE ~ NA_integer_),
    ideo3 = case_when(
        V162171 <= 0 ~ NA_character_,
        V162171 <= 2 ~ "Liberal", 
        V162171 >= 3 & V162171 <= 5 ~ "Moderate", 
        V162171 >= 6 ~ "Conservative",
        TRUE ~ NA_character_),
    party_reg = case_when(
      V161019 == -1 ~ "Unreg",
      V161019 == 1  ~ "Dem",
      V161019 == 2  ~ "Rep",
      V161019 == 4  ~ "Ind",
      V161019 == 5  ~ "Oth",
      TRUE ~ NA_character_),
    party_reg3 = case_when(
        V161019 == 1  ~ "Dem",
        V161019 == 2  ~ "Rep",
        V161019 == 4  ~ "Ind",
        TRUE ~ NA_character_),
    party_reg4 = case_when(
        V161019 == -1 ~ "Unreg",
        V161019 == 1  ~ "Dem",
        V161019 == 2  ~ "Rep",
        V161019 == 4  ~ "Ind",
        #V161019 == 5  ~ "other",
        TRUE ~ NA_character_),
    
    # partyid not party_reg
    party = case_when(
        #V161155 == 0 ~ "Other",
        V161155 == 1 ~ "Dem",
        V161155 == 2 ~ "Rep",
        V161155 == 3 ~ "Ind",
        #V161155 == 5 ~ "Oth",
        TRUE ~ "Oth"),
    pid7 = case_when(
        V161158x == 1 ~ "Strong Dem",
        V161158x == 2 ~ "Weak Dem",
        V161158x == 3 ~ "Ind-Dem",
        V161158x == 4 ~ "Ind",
        V161158x == 5 ~ "Ind-Rep",
        V161158x == 6 ~ "Weak Rep",
        V161158x == 7 ~ "Strong Rep",
        TRUE ~ "Other"),
    pid7_int = case_when(
        V161158x >= 1  & V161158x <= 7 ~ V161158x,
        TRUE ~ NA_integer_),
    pid4_16 = case_when(
        V161155 == 1  ~ "Dem",
        V161155 == 2  ~ "Rep",
        V161155 == 3  ~ "Ind",
        TRUE ~ "Oth") %>% 
        fct_relevel("Ind", "Dem", "Rep", "Oth"),
    pid3_16 = case_when(
        V161155 == 1  ~ "Dem",
        V161155 == 2  ~ "Rep",
        V161155 == 3  ~ "Ind",
        TRUE ~ NA_character_) %>%
        fct_relevel("Ind", "Dem", "Rep"),
    pid2 = case_when(
        V161155 == 1  ~ "Dem",
        V161155 == 2  ~ "Rep",
        TRUE ~ NA_character_) %>% 
        fct_relevel("Dem", "Rep"),
    
    educ16 = ifelse(0 < V161270 & V161270 < 17, V161270, NA),
    age16       = ifelse(V161267x > 0, V161267x, NA),
    mode16      = ifelse(V160501 == 1, "ftf", "web"), #mode16 of interview (face to face or online)
    income16    = ifelse(V161361x > 0, V161361x, NA),
    iwr_gender  = case_when(     #interviewer gender in pre-election survey
        V168257 == 1 ~ "male",
        V168257 == 2 ~ "female16",
        TRUE ~ NA_character_),
    iwr_id = ifelse(V168251 == -1, NA, V168251),
    vote_dem_primary = case_when( #who R voted for in democratic presidential primary
        V161021a == 1 ~ "clinton",
        V161021a == 2 ~ "sanders",
        TRUE ~ NA_character_
    ),
    vote_reported = ifelse(V162034a > 0 | V161022a == 1, 1, 0), #whether person said they reported in 2016 pres election
    vote_match = case_when(
      vote_reported == 1 & vote_validated16 == 1 ~ "match_vote",
      vote_reported == 0 & vote_validated16 == 0 ~ "match_novote",
      vote_reported == 1 & vote_validated16 == 0 ~ "nomatch_novote",
      vote_reported == 0 & vote_validated16 == 1 ~ "nomatch_vote",
      TRUE ~ NA_character_
    ),
    misreport = case_when(
      vote_match == "nomatch_novote" |
      vote_match == "nomatch_vote" ~ 1,
      vote_match == "match_novote" |
      vote_match == "match_vote" ~ 0,
      TRUE        ~ as.numeric(NA)
    ),
    vote_pres = case_when( #who R voted for in presidential election
        V162034a == 1 ~ "clinton",
        V162034a == 2 ~ "trump",
        V162034a == 3 ~ "johnson",
        V162034a == 4 ~ "stein",
        TRUE ~ NA_character_
    ),
    vote_pres_dr = case_when(
        vote_pres == "clinton" ~ "clinton",
        vote_pres == "trump"   ~ "trump",
        TRUE ~ NA_character_),
    vote_dem16 = ifelse(V162034a == 1, 1, 0), #dummy: voted for Clinton
    vote_rep16 = ifelse(V162034a == 2, 1, 0), #dummy: voted for Trump
    vote_pres_pre = case_when(  #who R expects to vote for (from pre-election survey)
        V161031 == 1 ~ "clinton", 
        V161031 == 2 ~ "trump",
        TRUE ~ NA_character_
    ),
    changed_mind = ifelse(vote_pres_pre == vote_pres, 0, 1), #dummy: voted for someone other than response in vote_pres_pre
    vote_pre_strongpref = case_when( #dummy: R has strong preference for candidate in vote_pres_pre
        V161032 == 1 ~ 1,
        V161032 == 2 ~ 0,
        TRUE ~ as.numeric(NA)
    ),
    all3 = ifelse(gendres1 == 3 & #R answered 3 for all gender resentment questions
                       gendres2 == 3 & 
                       gendres3 == 3 & 
                       gendres4 == 3, 1, 0)
)

#creating columns for all dem text, all rep text, and all text
# str_c will combine NA and text to NA. It's not paste (and paste converts NA to "NA")
# these should be deprecated
a16 <- a16 %>% mutate(
    dem = str_c(like_dem, dislike_dem),
    rep = str_c(like_rep, dislike_rep),
    all_text = str_c(dem, rep),
    
    all_problems = str_c(problem1, problem2, problem3, problem4)
)

#tidytext versions of questions (words unnested)
tidy_dem <- a16 %>% unnest_tokens(word, dem)
tidy_rep <- a16 %>% unnest_tokens(word, rep)
tidy_all <- a16 %>% unnest_tokens(word, all_text)
tidy_problems <- a16 %>% unnest_tokens(word, all_problems)

# could filter for text responses
prob_index <- (a16$V162116a == -2) # Text responses, separate file

# code nonresponse for affect with -50
# code nonresponse for MIP    with -25 (mean nchar 31)
a16$nonresp_value25 <- -25
a16$nonresp_value50 <- -50


#counting characters in text responses
a16 <- a16 %>% 
  mutate(
    nchar_like_dem    = ifelse(is.na(like_dem),    0, nchar(like_dem)),
    nchar_dislike_dem = ifelse(is.na(dislike_dem), 0, nchar(dislike_dem)),
    nchar_like_rep    = ifelse(is.na(like_rep),    0, nchar(like_rep)),
    nchar_dislike_rep = ifelse(is.na(dislike_rep), 0, nchar(dislike_rep)),
    
    # nchar_total_deprecated       = ifelse(is.na(all_text),    0, nchar(all_text)),
    # nchar_total      = nchar_like_dem + nchar_dislike_dem + nchar_like_rep + nchar_dislike_rep,
    
    nchar_lddr16        = nchar_like_dem + nchar_dislike_rep,
    nchar_lrdd16        = nchar_like_rep + nchar_dislike_dem,

    nchar_like_dem_log  = log(nchar_like_dem + 1),
    nchar_like_rep_log  = log(nchar_like_rep + 1),
    nchar_lddr_log      = log(nchar_lddr16 + 1),
    nchar_lrdd_log      = log(nchar_lrdd16 + 1),
    
    
    
    #rowSums(select(., nchar_problem1, nchar_problem2, nchar_problem3, nchar_problem4), na.rm = TRUE),
    
    # nchar_nr_ld = ifelse(is.na(like_dem), nonresp_value50, nchar(like_dem)) + abs(nonresp_value50),
    # nchar_nr_dd = ifelse(is.na(dislike_dem), nonresp_value50, nchar(dislike_dem)) + abs(nonresp_value50),
    # nchar_nr_lr = ifelse(is.na(like_rep), nonresp_value50, nchar(like_rep)) + abs(nonresp_value50),
    # nchar_nr_dr = ifelse(is.na(dislike_rep), nonresp_value50, nchar(dislike_rep)) + abs(nonresp_value50),
    # nchar_nr_total = nchar_nr_ld + nchar_nr_dd + nchar_nr_lr + nchar_nr_dr,

    # nchar_nr_lddr16 = nchar_nr_ld + nchar_nr_dr,
    # nchar_nr_lrdd16 = nchar_nr_lr + nchar_nr_dd,
    # 
    # nchar_nr_lddr_log = log(nchar_nr_lddr16 + 1),
    # nchar_nr_lrdd_log = log(nchar_nr_lrdd16 + 1),
    
    
    # creating dichotomized versions
    resp_like_dem    = ifelse(nchar_like_dem > 0, 1, 0), #response
    resp_dislike_dem = ifelse(nchar_dislike_dem > 0, 1, 0),
    resp_like_rep    = ifelse(nchar_like_rep > 0, 1, 0),
    resp_dislike_rep = ifelse(nchar_dislike_rep > 0, 1, 0),
    
    nonresp_like_dem    = ifelse(nchar_like_dem == 0, 1, 0), #nonresponse
    nonresp_dislike_dem = ifelse(nchar_dislike_dem == 0, 1, 0),
    nonresp_like_rep    = ifelse(nchar_like_rep == 0, 1, 0),
    nonresp_dislike_rep = ifelse(nchar_dislike_rep == 0, 1, 0),

    # total nonresponse (not binary)
    nonresp_lddr16 = nonresp_like_dem + nonresp_dislike_rep,
    nonresp_lrdd16 = nonresp_like_rep + nonresp_dislike_dem,

    # nonresponse binary
    nonresp_lddr_bin = (nonresp_lddr16 > 0) %>% as.numeric(),
    nonresp_lrdd_bin = (nonresp_lrdd16 > 0) %>% as.numeric(),
    
    # strongresp_like_dem    = ifelse(nchar_like_dem > 34, 1, 0), #strong response
    # strongresp_dislike_dem = ifelse(nchar_dislike_dem > 34, 1, 0),
    # strongresp_like_rep    = ifelse(nchar_like_rep > 34, 1, 0),
    # strongresp_dislike_rep = ifelse(nchar_dislike_rep > 34, 1, 0),

)



a16 <- a16 %>% 
    mutate(
        nchar_like_dem16_ihs     = asinh(nchar_like_dem),
        nchar_dislike_dem_ihs  = asinh(nchar_dislike_dem),
        nchar_like_rep16_ihs     = asinh(nchar_like_rep),
        nchar_dislike_rep_ihs  = asinh(nchar_dislike_rep)
    )

# anes <- anes %>%
#     mutate(
#         nchar_lddr_ihs         = asinh(nchar_like_dem16_ihs + nchar_dislike_rep_ihs),
#         nchar_lrdd_ihs         = asinh(nchar_like_rep16_ihs + nchar_dislike_dem_ihs),
# 
#         nchar_partisan_pool_ihs   = (nchar_lrdd_ihs - nchar_lddr_ihs)
#         #nchar_align_ihs       = nchar_raw_affect_ihs/max(abs(nchar_raw_affect_ihs))
#     )


a16 <- a16 %>% 
    group_by(mode16) %>% 
      mutate(
          max_nchar_like_dem_ihs     = max(nchar_like_dem16_ihs),
          max_nchar_dislike_dem_ihs  = max(nchar_dislike_dem_ihs),
          max_nchar_like_rep_ihs     = max(nchar_like_rep16_ihs),
          max_nchar_dislike_rep_ihs  = max(nchar_dislike_rep_ihs),
          
          max_nchar_lddr  = max(nchar_lddr16),
          max_nchar_lrdd  = max(nchar_lrdd16),
          
      ) %>% 
    ungroup()


a16 <- a16 %>%
    mutate(
        nchar_lddr_ihs         = (nchar_like_dem16_ihs/max_nchar_like_dem_ihs) + (nchar_dislike_rep_ihs/max_nchar_dislike_rep_ihs),
        nchar_lrdd_ihs         = (nchar_like_rep16_ihs/max_nchar_like_rep_ihs) + (nchar_dislike_dem_ihs/max_nchar_dislike_dem_ihs),

        nchar_align_ihs      = (nchar_lrdd_ihs - nchar_lddr_ihs)/2,
        
        nchar_lddr_pool_ihs         = asinh(nchar_lddr16)/asinh(max_nchar_lddr),
        nchar_lrdd_pool_ihs         = asinh(nchar_lrdd16)/asinh(max_nchar_lrdd),

        nchar_lddr_pool2_ihs         = asinh(nchar_lddr16/max_nchar_lddr),
        nchar_lrdd_pool2_ihs         = asinh(nchar_lrdd16/max_nchar_lrdd),
        
        nchar_partisan_pool_ihs     = (nchar_lddr_pool_ihs - nchar_lrdd_pool_ihs),
        nchar_partisan_pool2_ihs    = (nchar_lddr_pool2_ihs - nchar_lrdd_pool2_ihs)
        
        #nchar_align_ihs       = nchar_raw_affect_ihs/max(abs(nchar_raw_affect_ihs))
    )


# anes <- anes %>% 
#     group_by(mode16) %>% 
#       mutate(max_nchar_align_ihs = max(abs(nchar_raw_affect_ihs))) %>% 
#     ungroup() %>% 
#     mutate(
#         nchar_align_ihs  = nchar_raw_affect_ihs/max_nchar_align_ihs
#         )
# 
    

a16 <- a16 %>% 
    mutate(
        # nonresp is negatively coded, 0 = 2 responses, 2 = no responses
        # because it's negatively coded we subtract lrdd from lddr
        # reverse of expressive partisanship but keeps -2 = Pro-Clinton
        # +2 = Pro-Trump
        nonresp_all16 = (nonresp_lddr16 - nonresp_lrdd16),
        nonresp_all_fct  = case_when(
            nonresp_all16 == -2 ~ "-2 Dem",
            nonresp_all16 == -1 ~ "-1 Dem",
            nonresp_all16 ==  0 ~ "0",
            nonresp_all16 ==  1 ~ "1 Rep",
            nonresp_all16 ==  2 ~ "2 Rep"
        ) %>% fct_relevel(., "-2 Dem", "-1 Dem", "0", "1 Rep", "2 Rep"),
        
        nchar_all      = (nchar_lrdd16 - nchar_lddr16),
        #nchar_all_log  = ((nchar_lrdd16 + 1) - (nchar_lddr16 + 1)),
        nchar_all_log  = (nchar_lrdd_log - nchar_lddr_log),
        # nchar_all_5    = santoku::chop_equally(nchar_all, 5  ) %>% as.numeric() - 2.5,
        # nchar_all_11   = santoku::chop_equally(nchar_all, 11 ) %>% as.numeric() - 5.5,
        # nchar_all_21   = santoku::chop_equally(nchar_all, 21 ) %>% as.numeric() - 10.5,
        # nchar_all_51   = santoku::chop_equally(nchar_all, 51 ) %>% as.numeric() - 25.5,
        # nchar_all_101  = santoku::chop_equally(nchar_all, 101) %>% as.numeric() - 50.5,
        # nchar_all_01   = nchar_all/max(nchar_all),
        
        #nonresp_nchar_all16 = (nonresp_all16 + 3) + ((nchar_all_50) / 10 ),
        

        # nonresp_nchar_all2 = (nonresp_all16/max(abs(nonresp_all16))) + (nchar_all/(max(abs(nchar_all)))),
        # nonresp_nchar_all_log2 = (nonresp_all16/max(abs(nonresp_all16))) + (nchar_all_log/(max(abs(nchar_all_log))))
        
    )


a16 <- a16 %>% 
    group_by(mode16) %>% 
    mutate(
        nonresp_nchar_all16 = (nonresp_all16/max(abs(nonresp_all16))) + (nchar_all/(max(abs(nchar_all)))),
        
        nonresp_nchar_all_log = (nonresp_all16/max(abs(nonresp_all16))) + (nchar_all_log/(max(abs(nchar_all_log))))
    ) %>% 
    ungroup()


a16 <- a16 %>% 
    mutate(
        vote_pres_not_rep = 1 - vote_rep16,
        vote_pres_not_dem = 1 - vote_dem16
    )


nchar_prob <-
    function(problem_text) {
        case_when(
            is.na(problem_text)  ~ 0, #NA_integer_,
            problem_text == "-5" ~ 0, #NA_integer_,
            problem_text == "-6" ~ 0, #NA_integer_,
            problem_text == "-7" ~ 0, #NA_integer_,
            problem_text == "-8" ~ 0, # Don't know
            problem_text == "-9" ~ 0,  #NA_integer_, # 0, # Refused
            TRUE ~ nchar(problem_text)
        ) 
    }

a16 <- a16 %>% 
    mutate(
      nchar_problem1  = nchar_prob(problem1),
      nchar_problem2  = nchar_prob(problem2),
      nchar_problem3  = nchar_prob(problem3),
      nchar_problem4  = nchar_prob(problem4),
    )

a16 <- a16 %>% 
    group_by(mode16) %>% 
    mutate(
        max_nchar_problem1 = max(nchar_problem1),
        max_nchar_problem2 = max(nchar_problem2),
        max_nchar_problem3 = max(nchar_problem3),
        max_nchar_problem4 = max(nchar_problem4),
    ) %>% 
    ungroup()



a16 <- a16 %>% 
    mutate(
        nchar_prob1_ihs = asinh(nchar_problem1),
        nchar_prob2_ihs = asinh(nchar_problem2),
        nchar_prob3_ihs = asinh(nchar_problem3),
        nchar_prob4_ihs = asinh(nchar_problem4),
    )

a16 <- a16 %>% 
    mutate(
        nchar_prob1_bin = as.numeric(nchar_problem1 > 0),
        nchar_prob2_bin = as.numeric(nchar_problem2 > 0),
        nchar_prob3_bin = as.numeric(nchar_problem3 > 0),
        nchar_prob4_bin = as.numeric(nchar_problem4 > 0),
    ) %>% 
    mutate(
        nchar_prob_bin = nchar_prob1_bin + nchar_prob2_bin + nchar_prob3_bin + nchar_prob4_bin
    )

a16 <- a16 %>% 
    mutate(
        nchar_prob_tot_ihs = nchar_prob1_ihs + nchar_prob2_ihs + nchar_prob3_ihs + nchar_prob4_ihs,
        nchar_prob_tot     = nchar_problem1 + nchar_problem2 + nchar_problem3 + nchar_problem4
    )

a16 <- a16 %>%
      group_by(mode16) %>%
      mutate(nchar_prob_tot_ihs_z = scale(nchar_prob_tot_ihs)[,1]) %>%
    ungroup()


a16 <- a16 %>% 
    group_by(mode16) %>% 
    mutate(
        max_nchar_prob_tot_ihs = max(nchar_prob_tot_ihs),
        max_nchar_prob_tot     = max(nchar_prob_tot) 
    ) %>% 
    ungroup()

a16 <- a16 %>% 
    mutate(
        nchar_prob_tot_ihs_norm = (nchar_prob_tot_ihs / max_nchar_prob_tot_ihs),
        nchar_prob_tot_norm     = asinh(nchar_prob_tot) / asinh(max_nchar_prob_tot)
    )

a16 <- a16 %>% 
    group_by(mode16) %>% 
    mutate(
        max_nchar_prob1_ihs = max(nchar_prob1_ihs),
        max_nchar_prob2_ihs = max(nchar_prob2_ihs),
        max_nchar_prob3_ihs = max(nchar_prob3_ihs),
        max_nchar_prob4_ihs = max(nchar_prob4_ihs)
    ) %>% 
    ungroup()


a16 <- a16 %>% 
    mutate(
        nchar_problems_ihs = 
            (nchar_prob1_ihs/max_nchar_prob1_ihs) + 
            (nchar_prob2_ihs/max_nchar_prob2_ihs) + 
            (nchar_prob3_ihs/max_nchar_prob3_ihs) + 
            (nchar_prob4_ihs/max_nchar_prob4_ihs)
    )

a16 <- a16 %>% 
    group_by(mode16) %>% 
    mutate(
        max_nchar_problem1 = max(nchar_problem1),
        max_nchar_problem2 = max(nchar_problem2),
        max_nchar_problem3 = max(nchar_problem3),
        max_nchar_problem4 = max(nchar_problem4)
        ) %>% 
    mutate(
        nchar_problems_ihs2 = asinh(
            (nchar_problem1/max_nchar_problem1) + 
            (nchar_problem2/max_nchar_problem2) + 
            (nchar_problem3/max_nchar_problem3) + 
            (nchar_problem4/max_nchar_problem4)
      )
    ) %>% 
    ungroup()


a16 <- a16 %>%
    mutate(
        n_answered16 = rowSums(
            select(., nchar_problem1, nchar_problem2, nchar_problem3, nchar_problem4) != 0, na.rm = TRUE
        )
        
    ) 
    # deprecated as NAs may be no interview, no data, incomplete
    # nchar_problem1 = ifelse(is.na(problem1), 0, nchar(problem1)),
    # nchar_problem2 = ifelse(is.na(problem2), 0, nchar(problem2)),
    # nchar_problem3 = ifelse(is.na(problem3), 0, nchar(problem3)),
    # nchar_problem4 = ifelse(is.na(problem4), 0, nchar(problem4)),
    # total_nchar_problems = nchar_problem1 + nchar_problem2 + nchar_problem3 + nchar_problem4,

a16 <- a16 %>% 
    mutate(
        
    #total_nchar_problems = rowSums(select(., nchar_problem1, nchar_problem2, nchar_problem3, nchar_problem4), na.rm = TRUE),
    
    # nchar for problems questions, RECODED WITH "WEIGHTED" NA == -100
    # to ensure only one nonresp_value50, prior one takes precedence
    #nonresp_value50  = -50,
    
    # ANOVA tests suggest ~ -175 maximizes explanatory power with turnout & max(nchar) / 10 = 191
    # So maybe a more generalizable rule would be to weight nonresponse as -(max(nchar) * 0.1)
    # nchar_problems_total = nchar(problem1) + nchar(problem2) + nchar(problem3) + nchar(problem4),
    # nchar_problems_max  = max(nchar_problems_total, na.rm = TRUE),
    # nonresp_value50  = -abs(nchar_problems_max / 10), # take max value of measure, divide by 10
    
    nchar_nr_prob1    = ifelse(nchar_problem1 == 0, nonresp_value25, nchar_problem1) + abs(nonresp_value25),
    nchar_nr_prob2    = ifelse(nchar_problem2 == 0, nonresp_value25, nchar_problem2) + abs(nonresp_value25),
    nchar_nr_prob3    = ifelse(nchar_problem3 == 0, nonresp_value25, nchar_problem3) + abs(nonresp_value25),
    nchar_nr_prob4 = ifelse(nchar_problem4 == 0, nonresp_value25, nchar_problem4) + abs(nonresp_value25),

    # nchar_prob_nr_1     = (nchar_problem1 == 0) %>% as.numeric(),
    # nchar_prob_nr_2     = (nchar_problem2 == 0) %>% as.numeric(),
    # nchar_prob_nr_3     = (nchar_problem3 == 0) %>% as.numeric(),
    # nchar_prob_nr_4     = (nchar_problem4 == 0) %>% as.numeric(),
    

    # nchar_nr_prob1_log    = log(ifelse(nchar_problem1 == 0, nonresp_value25, nchar_problem1) + abs(nonresp_value25) + 1),
    # nchar_nr_prob2_log    = log(ifelse(nchar_problem2 == 0, nonresp_value25, nchar_problem2) + abs(nonresp_value25) + 1),
    # nchar_nr_prob3_log    = log(ifelse(nchar_problem3 == 0, nonresp_value25, nchar_problem3) + abs(nonresp_value25) + 1),
    # nchar_nr_prob4_log = log(ifelse(nchar_problem4 == 0, nonresp_value25, nchar_problem4) + abs(nonresp_value25) + 1),
    
    
    # nchar_nr_prob1    = ifelse(is.na(problem1), nonresp_value25, nchar(problem1)) + abs(nonresp_value25),
    # nchar_nr_prob2    = ifelse(is.na(problem2), nonresp_value25, nchar(problem2)) + abs(nonresp_value25),
    # nchar_nr_prob3    = ifelse(is.na(problem3), nonresp_value25, nchar(problem3)) + abs(nonresp_value25),
    # nchar_nr_prob4 = ifelse(is.na(problem4), nonresp_value25, nchar(problem4)) + abs(nonresp_value25),
    nchar_nr_prob_tot16 = nchar_nr_prob1 + nchar_nr_prob2 + nchar_nr_prob3 + nchar_nr_prob4,
    
    #nchar_nr_prob_tot_1k = nchar_nr_prob_tot16 / 1000
    
    )

# anes <- anes %>% 
#     mutate(
#     nchar_prob_nr_all   = 4 - rowSums(select(., nchar_prob_nr_1, nchar_prob_nr_2, nchar_prob_nr_3, nchar_prob_nr_4), na.rm = TRUE),
#     nchar_prob_all      = rowSums(select(., nchar_problem1, nchar_problem2, nchar_problem3, nchar_problem4), na.rm = TRUE),
#     nchar_prob_nr_comb  = (nchar_prob_nr_all) + (nchar_prob_all / 500)
#     
# 
# ) 

# Seems rowSums needs to come *after* other mutate calls that create columns
# or it doesn't see columns
# weights <- 1 / 2^(0:3)
# weights <- weights / sum(weights)


# anes <- anes %>% 
#     mutate(
#         total_nchar_problems  = rowSums(select(., nchar_problem1, nchar_problem2, nchar_problem3, nchar_problem4), na.rm = TRUE),
#         nchar_nr_prob_tot16     = rowSums(select(., nchar_nr_prob1, nchar_nr_prob2, nchar_nr_prob3, nchar_nr_prob4), na.rm = TRUE),
#         nchar_nr_prob_tot_log = log(nchar_nr_prob_tot16 + 1),
#         
#         nchar_nr_prob_tot_log2 = rowSums(select(., nchar_nr_prob1_log, nchar_nr_prob2_log, nchar_nr_prob3_log, nchar_nr_prob4_log), na.rm = TRUE),
#         
#         nchar_nr_prob_tot_log3 = rowSums(select(., nchar_nr_prob1_log, nchar_nr_prob2_log, nchar_nr_prob3_log), na.rm = TRUE),
#         
#         nchar_nr_prob1_log_weighted    = weights[1] * nchar_nr_prob1_log, 
#         nchar_nr_prob2_log_weighted    = weights[2] * nchar_nr_prob2_log,
#         nchar_nr_prob3_log_weighted    = weights[3] * nchar_nr_prob3_log,
#         nchar_nr_prob4_log_weighted = weights[4] * nchar_nr_prob4_log
#     )

# anes <- anes %>% 
#     mutate(
#         nchar_nr_prob_tot_log4         = rowSums(select(., nchar_nr_prob1_log_weighted, nchar_nr_prob2_log_weighted, nchar_nr_prob3_log_weighted), na.rm = TRUE)
#         
#     )


# create factor version of nonresp_*
a16 <- a16 %>% 
    mutate(
        nonresp_lddr16_fct = nonresp_lddr16 %>% as.factor() %>% fct_inseq(),
        nonresp_lrdd16_fct = nonresp_lrdd16 %>% as.factor() %>% fct_inseq()
    )


# registration, vote intent, early voting
a16 <- a16 %>% 
    mutate(
        reg_intent = case_when(
            V161024x == 1 ~ "Not Reg, No Intent",
            V161024x == 2 ~ "Not Reg, Intent",
            V161024x == 3 ~ "Reg, Not Early",
            V161024x == 4 ~ "Reg, Early"
        ) %>% fct_relevel("Reg, Not Early", "Not Reg, No Intent", "Not Reg, Intent", "Reg, Early"),
        reg_not_bin = str_detect(reg_intent, "Not Reg") %>% as.numeric(),
        reg16_bin     = 1 - reg_not_bin
        
    )

a16 <- a16 %>% 
    mutate(
        # voting behavior
        # dtvshc = car::recode(V162034a, "1=0; 2=1; else=NA"),
        # dtvsnot = car::recode(V162034a, "c(1,3,4,5,7,9)=0; 2=1; else=NA"),
        # dt2 = car::recode(dtvsnot, "1='For'; 2='Against'; else=NA", as.factor = TRUE),
        
        # white = car::recode(V161310a, "-20:-1=NA;"),
        # race16 = car::recode(V161310x, "-20:0=NA;1='white';2='black';3='asian';4='native';5='hispanic'; 6='other';", as.factor = TRUE),
        # educ = car::recode(V161270, "-20:0=NA; 90:100=NA"),
        
        # demographics
        own_home    = car::recode(V161334, "-20:-1=NA;"),
        religiosity = car::recode(V161245, "-20:-1=NA;"),
        
        # media & knowledge
        attn_news     = car::recode(V161008, "-20:-1=NA;"),
        attn_politics = car::recode(V161003, "-20:-1=NA;"),
        vote12        = car::recode(V161005, "-20:0=NA;"),
        foxweb        = car::recode(V161452, "-20:-1=NA;"),
        thrones       = car::recode(V161389, "-20:-1=NA;"), 
        
        # Do you favor, oppose ... building a wall 
        wall = car::recode(V161196, "-20:-1=NA;1=1;2=0;3=-1;"), 
        
        # How much discrimination is there in the US
        disc_gay = car::recode(V162361, "-20:0=NA; 1=5;2=4;3=3;4=2;5=1;"),
        disc_wom = car::recode(V162362, "-20:0=NA; 1=5;2=4;3=3;4=2;5=1;"),
        disc_men = car::recode(V162363, "-20:0=NA; 1=5;2=4;3=3;4=2;5=1;"),
        disc_mus = car::recode(V162364, "-20:0=NA; 1=5;2=4;3=3;4=2;5=1;"),
        disc_chr = car::recode(V162365, "-20:0=NA; 1=5;2=4;3=3;4=2;5=1;"), 
    
        # How important is being American to identity
        amer_ident = car::recode(V162332, "-20:0=NA; 1=5;2=4;3=3;4=2;5=1;"), 
        
        # Is Barack Obama a Muslim...?
        obama_muslim = car::recode(V162255, "-20:0=NA; 2=0; 1=1;"),
        
        # Party ID
        # pid7 = car::recode(V161158x, "-20:0=NA;"),
        # pid3 = car::recode(V161158x, "-20:0=NA; 1:3 = 1; 4 = 2; 5:7 = 3; else=NA"),
        # party2 = car::recode(V161155, "-20:0=NA; 1='Dem'; 2='Rep'; else=NA"),
        
        # Stereotype: Blacks hardworking, 1 = hard, 7 = lazy
        blk_hardwork = car::recode(V162346, "-20:-1=NA;7=1;6=2;5=3;4=4;3=5;2=6;1=7;"),
        # Stereotype: Blacks violent, 1 = peaceful, 7 = violent
        blk_viol = car::recode(V162350, "-20:-1=NA;"),
        # How much influence do blacks have in U.S. politics
        blk_inf = car::recode(V162323, "-20:0=NA; 1=5;2=4;3=3;4=2;5=1;"), 
        
        # When protesters get ‘roughed up’ for disrupting political events, 
        # how much do they generally deserve what happens to them?
        # 1 not at all, 5 a great deal
        prot_rough    = car::recode(V161343, "-20:0=NA;"), 
        
        # black lives matter feeling thermometer
        ft_blm        = car::recode(V162113, "-20:-1=NA;101:1000=NA"),
        ft_blmbin     = car::recode(V162113, "-20:-1=NA;0:33=1;34:66=NA;67:100=0;101:1000=NA"),
        
        # POST: feeling thermometers
        ft_trans      = car::recode(V162111, "-20:-1=NA;101:1000=NA"),
        ft_gays       = car::recode(V162103, "-20:-1=NA;101:1000=NA"),
        ft_police     = car::recode(V162110, "-20:-1=NA;101:1000=NA"),
        ft_jews       = car::recode(V162108, "-20:-1=NA;101:1000=NA"),
        ft_muslims    = car::recode(V162106, "-20:-1=NA;101:1000=NA"),
        ft_blacks     = car::recode(V162312, "-20:-1=NA;101:1000=NA"),
        ft_rich       = car::recode(V162105, "-20:-1=NA;101:1000=NA"),
        ft_liberals   = car::recode(V162097, "-20:-1=NA;101:1000=NA"),
        ft_conserv    = car::recode(V162101, "-20:-1=NA;101:1000=NA"),
        ft_feminists  = car::recode(V162096, "-20:-1=NA;101:1000=NA"),
        ft_christians = car::recode(V162095, "-20:-1=NA;101:1000=NA"),
        ft_scientists = car::recode(V162111, "-20:-1=NA;101:1000=NA"),
        ft_hispanics  = car::recode(V162311, "-20:-1=NA;101:1000=NA"),
        ft_immigrants = car::recode(V162313, "-20:-1=NA;101:1000=NA"),
        
        ft_con_lib    = ft_conserv - ft_liberals,
        
        ft_trump      = car::recode(V162079, "-20:-1=NA;101:1000=NA"),
        ft_clinton    = car::recode(V162078, "-20:-1=NA;101:1000=NA"),
        
        ft_trump_clinton = ft_trump - ft_clinton,

        ft_dem        = car::recode(V161095, "-20:-1=NA;101:1000=NA"),
        ft_rep        = car::recode(V161096, "-20:-1=NA;101:1000=NA"),
        ft_rep_dem    = ft_rep - ft_dem,
        
        # How worried about terrorist attack in next 12 months
        terror_attack = car::recode(V162294, "-20:-1=NA; 1=5;2=4;3=3;4=2;5=1;")
        
    )



# pre feeling thermometer
a16 <- a16 %>% 
    mutate(
        ft_pre_clinton = case_when(
            V161086 > 0 ~ V161086,
            TRUE        ~ NA_real_
        ),
        ft_pre_trump = case_when(
            V161087 > 0 ~ V161087,
            TRUE        ~ NA_real_
        ),
        ft_pre_tr_cl = ft_pre_trump - ft_pre_clinton
        
    )


a16 <- a16 %>% 
    mutate(
        likely_vote1 = case_when(
            V161133 > 0 ~ 6 - V161133,
            TRUE        ~ NA_real_
        ),
        likely_vote2 = case_when(
            V161238 > 0 ~ 6 - V161238,
            TRUE        ~ NA_real_
        ),
        likely_vote16 = case_when(
            likely_vote1 > 0 ~ likely_vote1,
            likely_vote2 > 0 ~ likely_vote2,
            TRUE             ~ NA_real_
        )
    )


## ---- impute-missingness-2016 ----

# skipping racial_resent16, ft... due to post-wave drop out
# i.e., structural missingness
# 
#Subset of key variables with missingness + relevant predictors
# vars_for_impute <- c("income16", "age16", "educ16", "female16")

# Create subset for imputation
# imp_data <- anes[ , vars_for_impute]
#
# # Run imputation with Predictive Mean Matching (PMM) — flexible + safe for non-normal data
# imp <- mice(imp_data, m = 1, method = "pmm", seed = 123)
#
# # Get completed data
# imputed_df <- complete(imp, 1)
#
# # Replace original variables (optional: rename with `_imp` suffix if you want to track changes)
# anes[ , vars_for_impute] <- imputed_df


# Amelia requires all variables to be numeric or factor
# amelia_out <- amelia(anes[ , vars_for_impute], 
#                      m = 1, 
#                      idvars = NULL, # specify if any variables shouldn't be imputed
#                      noms = c("female16", "race16"), # categorical variables
#                      seed = 123)
# 
# # Extract imputed data
# anes[ , vars_for_impute] <- amelia_out$imputations[[1]]


# ANES Wave harmonization

# a16 <- a16 %>%
#     rename(
        # Standardize text response names (add text_ prefix for clarity)
        # text_like_dem = like_dem,
        # text_dislike_dem = dislike_dem,
        # text_like_rep = like_rep,
        # text_dislike_rep = dislike_rep,
        
        # Standardize voting variables for consistency across waves
        #vote_validated = vote_validated16  # Match 2020/2024 naming
    # )


#save data
save(a16, file = here::here("text_data_output", "anes2016_processed.Rdata"))

save(tidy_dem, tidy_rep, tidy_all, tidy_problems, file = here::here("text_data_output", "anes2016_tidy_text.Rdata"))

